library(ade4) #Librairie qui permet l'implentation de fonctions statistiques et graphiques
library(FactoMineR) #Il permet de réaliser des analyses classiques telles que l'analyse en composantes principales (ACP), l'analyse des correspondances (AC), l'analyse des correspondances multiples (ACM) ainsi que des analyses plus avancées.
## 
## Attaching package: 'FactoMineR'
## The following object is masked from 'package:ade4':
## 
##     reconst
library(glmnet) #Permet d'ajuster l'ensemble du chemin de régularisation lasso ou élastique-net pour la régression linéaire
## Loading required package: Matrix
## Loaded glmnet 4.1-1
library(corrplot)#Permet de visualiser une matrice de corrélation par corrélogrammee
## corrplot 0.84 loaded
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:corrplot':
## 
##     corrplot
## The following object is masked from 'package:stats':
## 
##     loadings
#Importation des données
library(readr)
logtsDK <- read.delim("logtsDK.csv")#Pour importer cet ensemble de données

##Transformation des variables qualitatives en indicatrices:
logtsDK_taille <- logtsDK[,3:12]
logtsDK_fact <- logtsDK[,13:29]
logtsDKnomIndic <- acm.disjonctif(logtsDK_fact)
LDK = cbind(logtsDK_taille,logtsDKnomIndic)

##Regression MCO directement:
logtsDK_taille<-as.matrix(logtsDK_taille)
MCO_taille<- lm(logtsDK$Loyer~logtsDK_taille,data= as.data.frame(logtsDK))
summary(MCO_taille)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ logtsDK_taille, data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -305.39 -125.68  -42.34   99.10  913.05 
## 
## Coefficients: (2 not defined because of singularities)
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  430.0836   421.0314   1.022  0.31468   
## logtsDK_tailleSurfTerrain      1.2765     0.4273   2.987  0.00536 **
## logtsDK_tailleSurfHabitable    2.9112     2.9678   0.981  0.33400   
## logtsDK_tailleSurfPiecResid  -29.9119    16.7734  -1.783  0.08403 . 
## logtsDK_tailleNbPieces      -177.3241   225.0162  -0.788  0.43647   
## logtsDK_tailleNbPiecesResid  254.2928   300.7181   0.846  0.40404   
## logtsDK_tailleNbSDB          224.3603   240.5155   0.933  0.35789   
## logtsDK_tailleNbChamBur     -209.9237    99.3588  -2.113  0.04251 * 
## logtsDK_tailleNbSalonsSAM          NA         NA      NA       NA   
## logtsDK_tailleNbWC           384.2636   218.9312   1.755  0.08880 . 
## logtsDK_tailleNbCuis               NA         NA      NA       NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 239.8 on 32 degrees of freedom
## Multiple R-squared:  0.7541, Adjusted R-squared:  0.6926 
## F-statistic: 12.26 on 8 and 32 DF,  p-value: 7.887e-08
##ACP par thème:
#Thème Taille:
PCA1<-PCA(logtsDK_taille) 
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

plot.PCA(PCA1,choix = "var")  

PCA1$var$cos2
##                   Dim.1        Dim.2       Dim.3        Dim.4        Dim.5
## SurfTerrain   0.6957596 0.0023491231 0.147534287 0.1007699984 1.035906e-03
## SurfHabitable 0.9652200 0.0075977653 0.003656496 0.0041111908 6.947176e-03
## SurfPiecResid 0.5601628 0.0668936381 0.289068886 0.0536207377 1.248462e-02
## NbPieces      0.9676251 0.0004346449 0.030462967 0.0001964992 7.727818e-05
## NbPiecesResid 0.9137550 0.0030160610 0.068883614 0.0002818111 9.413272e-03
## NbSDB         0.8300173 0.0054225362 0.000296253 0.0427220693 4.366669e-02
## NbChamBur     0.8453092 0.0013572035 0.096734857 0.0081218865 4.729963e-02
## NbSalonsSAM   0.7644196 0.0711421011 0.009720707 0.0739533555 4.454982e-02
## NbWC          0.7980084 0.0582834622 0.002742312 0.0059807346 9.595988e-03
## NbCuis        0.1360579 0.8006677932 0.041336963 0.0006572742 3.691325e-03
corrplot(PCA1$var$cos2) 

##Valeurs propres:
PCA1$eig
##           eigenvalue percentage of variance cumulative percentage of variance
## comp 1  7.476335e+00           7.476335e+01                          74.76335
## comp 2  1.017164e+00           1.017164e+01                          84.93499
## comp 3  6.904373e-01           6.904373e+00                          91.83937
## comp 4  2.904156e-01           2.904156e+00                          94.74352
## comp 5  1.787617e-01           1.787617e+00                          96.53114
## comp 6  1.754505e-01           1.754505e+00                          98.28564
## comp 7  1.597364e-01           1.597364e+00                          99.88301
## comp 8  1.169930e-02           1.169930e-01                         100.00000
## comp 9  3.136885e-30           3.136885e-29                         100.00000
## comp 10 2.524442e-32           2.524442e-31                         100.00000
barplot(PCA1$eig[,2])

pca1 = prcomp(logtsDK_taille) 

##La croissance des valeurs propres

pca1$rotation ##Les composantes principales
##                         PC1           PC2          PC3          PC4
## SurfTerrain   -0.9015978499  4.324693e-01 -0.009169902 -0.001711798
## SurfHabitable -0.4312907882 -8.991760e-01 -0.018069683  0.070191734
## SurfPiecResid -0.0253149027 -3.480241e-02  0.933374069 -0.341462769
## NbPieces      -0.0167691961 -4.159075e-02 -0.243539707 -0.751182081
## NbPiecesResid -0.0102744528 -3.000054e-02 -0.201291721 -0.374396168
## NbSDB         -0.0031195791 -4.901760e-03 -0.023623484 -0.209235806
## NbChamBur     -0.0073873933 -2.181098e-02 -0.162214303 -0.329431970
## NbSalonsSAM   -0.0028870594 -8.189555e-03 -0.039077418 -0.044964198
## NbWC          -0.0030333584 -6.704037e-03 -0.008944071 -0.136986969
## NbCuis        -0.0003418059  1.558701e-05 -0.009680432 -0.030563138
##                         PC5          PC6          PC7           PC8
## SurfTerrain   -0.0007162984 -0.001429731 -0.001223207  0.0007223059
## SurfHabitable  0.0013259133  0.011541967  0.007923196 -0.0044300523
## SurfPiecResid -0.0376762163 -0.088164094 -0.030592031  0.0153523845
## NbPieces       0.2724491793  0.190508482  0.019356024 -0.2218569125
## NbPiecesResid -0.0841160278 -0.519627551 -0.175089739  0.2863344421
## NbSDB          0.1233769467  0.283891985  0.769203385  0.2320613238
## NbChamBur     -0.7018008002 -0.116556805 -0.035405338  0.0111605355
## NbSalonsSAM    0.6176847725 -0.403070746 -0.139684401  0.2751739065
## NbWC           0.0965307868  0.624398932 -0.595888538  0.1119792231
## NbCuis         0.1366574735 -0.198154883  0.021130916 -0.8522319016
##                         PC9          PC10
## SurfTerrain    2.803548e-18 -1.654866e-18
## SurfHabitable  5.060608e-18  1.311200e-17
## SurfPiecResid -4.768563e-17 -3.850923e-18
## NbPieces      -3.772700e-01  2.682407e-01
## NbPiecesResid -8.304163e-02 -6.493655e-01
## NbSDB          3.772700e-01 -2.682407e-01
## NbChamBur      4.603116e-01  3.811248e-01
## NbSalonsSAM    4.603116e-01  3.811248e-01
## NbWC           3.772700e-01 -2.682407e-01
## NbCuis         3.772700e-01 -2.682407e-01
pca1$sdev ##Les écarts-types "bruts" 
##  [1] 2.165710e+02 5.324754e+01 4.314768e+00 1.082570e+00 6.356562e-01
##  [6] 4.952575e-01 3.589661e-01 1.834611e-01 6.331211e-16 1.297068e-16
##Régression sur composantes principales

pcr1<- lm(logtsDK$Loyer~PCA1$ind$coord,data= as.data.frame(logtsDK))
summary(pcr1)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCA1$ind$coord, data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -436.25 -184.03  -10.35  121.94  908.92 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           356.05      40.87   8.712 2.75e-10 ***
## PCA1$ind$coordDim.1   117.40      14.95   7.854 3.13e-09 ***
## PCA1$ind$coordDim.2   -35.28      40.52  -0.871   0.3899    
## PCA1$ind$coordDim.3    78.97      49.19   1.605   0.1174    
## PCA1$ind$coordDim.4  -105.36      75.84  -1.389   0.1735    
## PCA1$ind$coordDim.5  -260.68      96.66  -2.697   0.0107 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 261.7 on 35 degrees of freedom
## Multiple R-squared:  0.6796, Adjusted R-squared:  0.6338 
## F-statistic: 14.85 on 5 and 35 DF,  p-value: 7.961e-08
##On remarque qu'après la régression sur les composantes principales; les plus utiles sont la première, et la dernière. 
##Régression MCO sur les CP 1 et 5: 
MCO_taille2<- lm(logtsDK$Loyer~PCA1$ind$coord[,c(1,5)],data= as.data.frame(logtsDK))
summary(MCO_taille2)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCA1$ind$coord[, c(1, 5)], data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -471.72 -166.46   -2.39  128.45  865.44 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      356.05      42.07   8.463 2.84e-10 ***
## PCA1$ind$coord[, c(1, 5)]Dim.1   117.40      15.39   7.630 3.51e-09 ***
## PCA1$ind$coord[, c(1, 5)]Dim.5  -260.68      99.50  -2.620   0.0126 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 269.4 on 38 degrees of freedom
## Multiple R-squared:  0.6314, Adjusted R-squared:  0.612 
## F-statistic: 32.54 on 2 and 38 DF,  p-value: 5.826e-09
##C'est plus intéressant de faire la régression sur les 2 composantes que sur une seule. Ainsi, y'a pas de confusion 
##entre les composantes; c'est la vraie-significativité. 

##Thème Qualité:
logtsDK_qualite <- LDK[,11:33]
PCA2<- PCA(logtsDK_qualite, scale=TRUE)

corrplot(PCA2$var$cos2) 

##Valeurs propres:
PCA2$eig
##           eigenvalue percentage of variance cumulative percentage of variance
## comp 1  7.379811e+00           3.208613e+01                          32.08613
## comp 2  3.105184e+00           1.350080e+01                          45.58693
## comp 3  2.523830e+00           1.097317e+01                          56.56011
## comp 4  1.885202e+00           8.196528e+00                          64.75663
## comp 5  1.551232e+00           6.744489e+00                          71.50112
## comp 6  1.387970e+00           6.034654e+00                          77.53578
## comp 7  1.211577e+00           5.267725e+00                          82.80350
## comp 8  1.073695e+00           4.668239e+00                          87.47174
## comp 9  9.157240e-01           3.981409e+00                          91.45315
## comp 10 7.103919e-01           3.088660e+00                          94.54181
## comp 11 6.577247e-01           2.859673e+00                          97.40148
## comp 12 2.544728e-01           1.106403e+00                          98.50789
## comp 13 2.322656e-01           1.009851e+00                          99.51774
## comp 14 1.109207e-01           4.822639e-01                         100.00000
## comp 15 4.363338e-30           1.897104e-29                         100.00000
## comp 16 6.547955e-31           2.846937e-30                         100.00000
## comp 17 1.824182e-31           7.931227e-31                         100.00000
## comp 18 1.084392e-31           4.714747e-31                         100.00000
## comp 19 6.367497e-32           2.768477e-31                         100.00000
## comp 20 4.914188e-32           2.136604e-31                         100.00000
## comp 21 3.434781e-32           1.493383e-31                         100.00000
## comp 22 2.695648e-32           1.172021e-31                         100.00000
## comp 23 1.002895e-32           4.360411e-32                         100.00000
barplot(PCA2$eig[,2])

##Régression sur composantes principales
pcr2<- lm(logtsDK$Loyer~PCA2$ind$coord,data= as.data.frame(logtsDK))
summary(pcr2)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCA2$ind$coord, data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -490.29 -116.25  -59.55   21.36 1202.08 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           356.05      49.33   7.217 2.01e-08 ***
## PCA2$ind$coordDim.1   111.17      18.16   6.122 5.34e-07 ***
## PCA2$ind$coordDim.2     5.61      28.00   0.200    0.842    
## PCA2$ind$coordDim.3   -10.22      31.05  -0.329    0.744    
## PCA2$ind$coordDim.4    52.77      35.93   1.469    0.151    
## PCA2$ind$coordDim.5    16.91      39.61   0.427    0.672    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 315.9 on 35 degrees of freedom
## Multiple R-squared:  0.5331, Adjusted R-squared:  0.4664 
## F-statistic: 7.993 on 5 and 35 DF,  p-value: 4.147e-05
##MCO sur la première composante:
MCO_qualite<- lm(logtsDK$Loyer~PCA2$ind$coord[,1],data= as.data.frame(logtsDK))
summary(MCO_qualite)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCA2$ind$coord[, 1], data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -467.03 -141.66  -56.32   28.14 1297.99 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           356.05      48.37   7.361 6.87e-09 ***
## PCA2$ind$coord[, 1]   111.17      17.80   6.244 2.37e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 309.7 on 39 degrees of freedom
## Multiple R-squared:  0.4999, Adjusted R-squared:  0.4871 
## F-statistic: 38.99 on 1 and 39 DF,  p-value: 2.374e-07
##Thème Situation: 
logtsDK_situation<- LDK[,34:50]
PCA3<- PCA(logtsDK_situation)
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

corrplot(PCA3$var$cos2)

##Valeurs propres:
PCA3$eig
##           eigenvalue percentage of variance cumulative percentage of variance
## comp 1  4.601369e+00           2.706688e+01                          27.06688
## comp 2  4.061235e+00           2.388962e+01                          50.95650
## comp 3  2.429751e+00           1.429265e+01                          65.24915
## comp 4  2.200733e+00           1.294549e+01                          78.19463
## comp 5  1.242892e+00           7.311132e+00                          85.50577
## comp 6  1.020835e+00           6.004912e+00                          91.51068
## comp 7  5.795349e-01           3.409029e+00                          94.91971
## comp 8  4.129103e-01           2.428884e+00                          97.34859
## comp 9  3.013460e-01           1.772624e+00                          99.12121
## comp 10 1.493937e-01           8.787865e-01                         100.00000
## comp 11 2.041883e-30           1.201108e-29                         100.00000
## comp 12 6.078277e-31           3.575457e-30                         100.00000
## comp 13 2.049065e-31           1.205333e-30                         100.00000
## comp 14 1.091975e-31           6.423380e-31                         100.00000
## comp 15 5.527018e-32           3.251187e-31                         100.00000
## comp 16 2.333037e-32           1.372375e-31                         100.00000
## comp 17 7.061867e-33           4.154039e-32                         100.00000
barplot(PCA3$eig[,2])

##Régression sur composantes principales
pcr3<- lm(logtsDK$Loyer~PCA3$ind$coord,data= as.data.frame(logtsDK))
summary(pcr3)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCA3$ind$coord, data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -605.80 -121.34  -11.22   82.76 1294.20 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           356.05      50.29   7.080 3.01e-08 ***
## PCA3$ind$coordDim.1   126.92      23.44   5.414 4.58e-06 ***
## PCA3$ind$coordDim.2   -19.05      24.95  -0.764   0.4502    
## PCA3$ind$coordDim.3   -82.66      32.26  -2.562   0.0149 *  
## PCA3$ind$coordDim.4   -23.54      33.90  -0.695   0.4919    
## PCA3$ind$coordDim.5   -20.56      45.11  -0.456   0.6513    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 322 on 35 degrees of freedom
## Multiple R-squared:  0.5149, Adjusted R-squared:  0.4456 
## F-statistic:  7.43 on 5 and 35 DF,  p-value: 7.727e-05
##MCO sur les composantes principales 1 et 3:
MCO_surface<- lm(logtsDK$Loyer~PCA3$ind$coord[,c(1,3)],data= as.data.frame(logtsDK))
summary(MCO_surface) 
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCA3$ind$coord[, c(1, 3)], data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -534.04 -123.33  -10.44  105.96 1365.96 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      356.05      49.13   7.247 1.14e-08 ***
## PCA3$ind$coord[, c(1, 3)]Dim.1   126.92      22.90   5.541 2.42e-06 ***
## PCA3$ind$coord[, c(1, 3)]Dim.3   -82.66      31.52  -2.623   0.0125 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 314.6 on 38 degrees of freedom
## Multiple R-squared:  0.4973, Adjusted R-squared:  0.4708 
## F-statistic: 18.79 on 2 and 38 DF,  p-value: 2.116e-06
#Simultanémenet:
PCAT<- PCA(LDK)

pcrT<- lm(logtsDK$Loyer~PCAT$ind$coord,data= as.data.frame(logtsDK))
summary(pcrT)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCAT$ind$coord, data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -274.66  -96.46  -31.16   71.71  845.08 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           356.05      33.83  10.524 2.19e-12 ***
## PCAT$ind$coordDim.1    78.17       8.14   9.603 2.42e-11 ***
## PCAT$ind$coordDim.2   -63.89      13.96  -4.578 5.70e-05 ***
## PCAT$ind$coordDim.3    23.61      15.09   1.565  0.12655    
## PCAT$ind$coordDim.4   -12.38      15.83  -0.782  0.43931    
## PCAT$ind$coordDim.5    48.99      17.15   2.856  0.00718 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 216.6 on 35 degrees of freedom
## Multiple R-squared:  0.7804, Adjusted R-squared:  0.749 
## F-statistic: 24.88 on 5 and 35 DF,  p-value: 1.295e-10
##Modélisation du loyer sur les thèmes simultanément:
MCO_T<- lm(logtsDK$Loyer~PCAT$ind$coord[,c(1,2,5)],data= as.data.frame(logtsDK))
summary(MCO_T)
## 
## Call:
## lm(formula = logtsDK$Loyer ~ PCAT$ind$coord[, c(1, 2, 5)], data = as.data.frame(logtsDK))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -288.35  -99.20  -28.09   59.82  885.65 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        356.049     34.315  10.376 1.66e-12 ***
## PCAT$ind$coord[, c(1, 2, 5)]Dim.1   78.171      8.256   9.468 1.99e-11 ***
## PCAT$ind$coord[, c(1, 2, 5)]Dim.2  -63.893     14.155  -4.514 6.26e-05 ***
## PCAT$ind$coord[, c(1, 2, 5)]Dim.5   48.986     17.399   2.815  0.00776 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 219.7 on 37 degrees of freedom
## Multiple R-squared:  0.7612, Adjusted R-squared:  0.7418 
## F-statistic: 39.31 on 3 and 37 DF,  p-value: 1.356e-11
#Coefficients des variables reconstitués selon la RCP :


##Classification sur variables:
library(ClustOfVar)
LDK = cbind(logtsDK_taille,logtsDKnomIndic)
LDK_cr<- scale(LDK)*sqrt(81/82) ##Standardiser les variables
##On procéde par une classifiction hiérarchique sur les variables avec l'indice WARD:
dv<- dist(LDK_cr,method="euclidean")
CAH<- hclust(d=dv,method="ward.D")

##Dendrogramme
plot(CAH)

##Coupure de l'arbre pour k=2classes: 
PV2<-cutree(tree = CAH,k=2) 

##Calcul du R^2 des variables avec la variable de classe; On stocke tous les R^2 dans un seul vecteur: 
R2_PV2= cbind(rep(0,ncol(LDK_cr)))
for (i in cbind(1:ncol(LDK_cr))) {R2_PV2[i] = summary(lm(LDK_cr[,i]~as.factor(PV2)))$r.squared}

##Calcul du R^2 de la partition: 
R2P_PV2<- mean(R2_PV2)
## ON lance une boucle pour calculer R2 de la partiton pour k appartient à {3,4,..,8}
V <- rep(0,8)
for(i in 1:8) {
  PV<-cutree(tree = CAH,k=(i+1)) 
  R2_PV = cbind(rep(0,ncol(LDK_cr)))
  for ( j in cbind(1:ncol(LDK_cr))) {R2_PV[j] =
    summary(lm(LDK_cr[,j]~as.factor(PV)))$r.squared
  V[i]<- mean(R2_PV)}
}
## Warning in summary.lm(lm(LDK_cr[, j] ~ as.factor(PV))): essentially perfect fit:
## summary may be unreliable
V
## [1] 0.1457266 0.2215363 0.2802125 0.3256438 0.3703531 0.4066013 0.4389535
## [8] 0.4682740
##Le vecteur V représenté ci-dessus contient les R^2 de chaque partition 
##D'après le vecteur V qui contient les R^2 de chaque partition; on peut choisir 7 partitions.
##En effet, l'amélioration de l'agrégation de 6 à 7 classes est plus intéressante que l'agrégation de 7 à 8 classes.

##saut d'inertie:
inertie <- sort(CAH$height, decreasing = TRUE)
plot(inertie[1:20], type = "s", xlab = "Nombre de classes", ylab = "Inertie")

plot(inertie[1:20], type = "s", xlab = "Nombre de classes", ylab = "Inertie")
points(c(2, 4), inertie[c(2,4)], col = c("green3", "red3"), cex = 2, lwd = 3)

##Description de la partition en 4 classes:
P4 <- cutree(tree = CAH,k=4)

summary(P4)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   2.927   4.000   4.000
#Régression PLS:


LDKpls<-plsr(as.matrix(logtsDK[,2])~as.matrix(LDK),validation="LOO")
LDKpls$validation$PRESS
##                         1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## as.matrix(logtsDK[, 2]) 3201973 3706750 3309537 2994531 3232933 3617638 4084327
##                         8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## as.matrix(logtsDK[, 2]) 4048026 3973410  4052785  4239620  4859660  5057962
##                         14 comps 15 comps 16 comps 17 comps 18 comps 19 comps
## as.matrix(logtsDK[, 2])  5509839  6006132  6228561  6465885  6777863  6985848
##                         20 comps 21 comps 22 comps 23 comps 24 comps 25 comps
## as.matrix(logtsDK[, 2])  7051845  7081969  7082304  7055691  7041062  7032221
##                         26 comps 27 comps 28 comps 29 comps 30 comps 31 comps
## as.matrix(logtsDK[, 2])  7023471  7009713  6998962  6992518  6992701  6992704
##                         32 comps 33 comps 34 comps 35 comps 36 comps 37 comps
## as.matrix(logtsDK[, 2])  6992663  6992626  6992625  6992624  6992624  6992624
##                         38 comps 39 comps
## as.matrix(logtsDK[, 2])  6992624  6992624
barplot(LDKpls$validation$PRESS)

plot(LDKpls)

##Selon les composantes retenues:
LDKpls1 <- plsr(as.matrix(logtsDK[,2]) ~as.matrix(LDK), ncomp=1)
cor(logtsDK[,2],LDKpls1$fitted.values[,1,1])
## [1] 0.803004
cor(logtsDK[,2],LDKpls1$fitted.values[,1,1])^2
## [1] 0.6448155
plot(LDKpls1)

##On passe au log de la variable y à prédire (ici c'est le loyer): 
LDKLogYpls <- plsr(as.matrix(log(logtsDK[,2]))~ as.matrix(LDK),validation = "LOO")
plot(LDKLogYpls)

LDKLogYpls1 <- plsr(as.matrix(log(logtsDK[,2])) ~as.matrix(LDK),ncomp=1)
barplot(LDKLogYpls$validation$PRESS)

plot(RMSEP(LDKLogYpls), legendpos ="topright")

summary(LDKLogYpls)
## Data:    X dimension: 41 82 
##  Y dimension: 41 1
## Fit method: kernelpls
## Number of components considered: 39
## 
## VALIDATION: RMSEP
## Cross-validated using 41 leave-one-out segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV           1.208   0.7991   0.7299   0.6763   0.3805   0.3095   0.3121
## adjCV        1.208   0.7985   0.7294   0.6696   0.3786   0.3074   0.3107
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## CV      0.3284   0.3356   0.3337    0.3352    0.3376    0.3433    0.3516
## adjCV   0.3263   0.3323   0.3303    0.3319    0.3340    0.3392    0.3474
##        14 comps  15 comps  16 comps  17 comps  18 comps  19 comps  20 comps
## CV       0.3541    0.3489    0.3428    0.3362    0.3343    0.3339    0.3340
## adjCV    0.3498    0.3447    0.3386    0.3321    0.3302    0.3298    0.3299
##        21 comps  22 comps  23 comps  24 comps  25 comps  26 comps  27 comps
## CV       0.3350    0.3367    0.3371    0.3371    0.3372    0.3373    0.3374
## adjCV    0.3309    0.3326    0.3330    0.3330    0.3331    0.3331    0.3333
##        28 comps  29 comps  30 comps  31 comps  32 comps  33 comps  34 comps
## CV       0.3375    0.3375    0.3375    0.3375    0.3375    0.3375    0.3375
## adjCV    0.3333    0.3333    0.3333    0.3333    0.3333    0.3333    0.3333
##        35 comps  36 comps  37 comps  38 comps  39 comps
## CV       0.3375    0.3375    0.3375    0.3375    0.3375
## adjCV    0.3333    0.3333    0.3333    0.3333    0.3333
## 
## TRAINING: % variance explained
##                               1 comps  2 comps  3 comps  4 comps  5 comps
## X                               94.21    99.95    99.96    99.99    99.99
## as.matrix(log(logtsDK[, 2]))    59.22    65.68    85.60    94.54    97.49
##                               6 comps  7 comps  8 comps  9 comps  10 comps
## X                               99.99    99.99    99.99    99.99     99.99
## as.matrix(log(logtsDK[, 2]))    98.14    98.72    99.39    99.57     99.68
##                               11 comps  12 comps  13 comps  14 comps  15 comps
## X                                99.99     99.99    100.00    100.00    100.00
## as.matrix(log(logtsDK[, 2]))     99.82     99.91     99.95     99.97     99.98
##                               16 comps  17 comps  18 comps  19 comps  20 comps
## X                               100.00    100.00    100.00    100.00       100
## as.matrix(log(logtsDK[, 2]))     99.98     99.99     99.99     99.99       100
##                               21 comps  22 comps  23 comps  24 comps  25 comps
## X                                  100       100       100       100       100
## as.matrix(log(logtsDK[, 2]))       100       100       100       100       100
##                               26 comps  27 comps  28 comps  29 comps  30 comps
## X                                  100       100       100       100       100
## as.matrix(log(logtsDK[, 2]))       100       100       100       100       100
##                               31 comps  32 comps  33 comps  34 comps  35 comps
## X                                  100       100       100       100       100
## as.matrix(log(logtsDK[, 2]))       100       100       100       100       100
##                               36 comps  37 comps  38 comps  39 comps
## X                                  100       100       100       100
## as.matrix(log(logtsDK[, 2]))       100       100       100       100
##On fait la prédiction avec 5 composantes: 
LDKLogYpls5 = plsr(as.matrix(log(logtsDK[,2])) ~as.matrix(LDK),ncomp=5)
plot(LDKLogYpls5)

cor(log(logtsDK[,2]),LDKLogYpls5$fitted.values[,1,5])
## [1] 0.9873553
cor(log(logtsDK[,2]),LDKLogYpls5$fitted.values[,1,5])^2
## [1] 0.9748705
##On trouve R^2=0.975; on régle sur le nombre des composantes.
plot(LDKLogYpls, ncomp = 5, line = TRUE)

cor(x=LDK,y=LDKLogYpls5$scores)
##                               Comp 1       Comp 2        Comp 3        Comp 4
## SurfTerrain              0.990385171 -0.138337005  0.0002398295 -1.663050e-04
## SurfHabitable            0.899451517  0.437019258 -0.0002748108 -7.809467e-04
## SurfPiecResid            0.781935836  0.245471091 -0.3225266785  4.706671e-01
## NbPieces                 0.824551638  0.479147822  0.2209499379 -1.310730e-01
## NbPiecesResid            0.774878198  0.531747845  0.1924836334 -2.282014e-01
## NbSDB                    0.819733245  0.296924813  0.2283418196  1.282359e-02
## NbChamBur                0.747040090  0.518401539  0.1499996248 -2.888039e-01
## NbSalonsSAM              0.704514616  0.469560994  0.2608879852 -4.135583e-02
## NbWC                     0.778695118  0.402030140  0.2117711081  9.608673e-02
## NbCuis                   0.330904755 -0.010502152  0.2421217408 -5.424413e-02
## Type.Appart             -0.718821245 -0.413137418  0.1973739471 -6.268795e-02
## Type.Villa               0.718821245  0.413137418 -0.1973739471  6.268795e-02
## Standing.Non            -0.500981659 -0.109997886 -0.4229686883 -3.248913e-01
## Standing.Oui             0.500981659  0.109997886  0.4229686883  3.248913e-01
## Etat.Bon                -0.007531767  0.367046574  0.1712680760  1.549622e-01
## Etat.Mediocre           -0.276836129 -0.259233126 -0.3557440952 -1.038299e-01
## Etat.Neuf                0.438593150 -0.142440935  0.1668706209 -8.364715e-02
## Etat.Vetuste            -0.148257063 -0.057650699  0.0354129438  7.773829e-05
## Jardin.Non              -0.565733717  0.172829170 -0.1195449375 -2.118147e-01
## Jardin.Oui               0.565733717 -0.172829170  0.1195449375  2.118147e-01
## Cour.Non                -0.274585332 -0.183944205 -0.2041000168  1.007354e-01
## Cour.Oui                 0.274585332  0.183944205  0.2041000168 -1.007354e-01
## Piscine.Non             -0.464580446 -0.243198215  0.0694250700 -1.086425e-01
## Piscine.Oui              0.464580446  0.243198215 -0.0694250700  1.086425e-01
## Garage.Non              -0.613058918 -0.337048835 -0.1476295320 -2.527746e-01
## Garage.Park             -0.151462408 -0.167604062  0.2792691515  1.765326e-01
## Garage.Priv1v            0.349468962  0.256302756 -0.1524947227  2.287816e-01
## Garage.Priv2v            0.589624226  0.324000393  0.1374726965 -1.354518e-01
## Egout.Non                0.475953892  0.297408723  0.0206252914 -3.502615e-02
## Egout.Oui               -0.475953892 -0.297408723 -0.0206252914  3.502615e-02
## HiTech.1inst             0.291059952  0.323368977 -0.1178810275  1.222242e-01
## HiTech.2inst             0.617303193 -0.202415682  0.1505408677 -2.536878e-01
## HiTech.Non              -0.595130964 -0.178382636  0.0233084470  2.888423e-02
## DistCtrVille.0          -0.140677622 -0.027225363  0.5369006564  2.868468e-01
## DistCtrVille.1a5km       0.033989509 -0.100644522 -0.0246627000  7.288228e-02
## DistCtrVille.inf1km     -0.216815170 -0.258112330 -0.0573307347 -1.218231e-01
## DistCtrVille.sup5km      0.211602778  0.326656033 -0.2356661006 -1.496103e-01
## Commerc.inf2km          -0.351804766 -0.188096974  0.2426887341 -9.768146e-02
## Commerc.sup2km           0.351804766  0.188096974 -0.2426887341  9.768146e-02
## BordMer.inf2km           0.387925350  0.224403526  0.1553957317  5.972429e-02
## BordMer.sup2km          -0.387925350 -0.224403526 -0.1553957317 -5.972429e-02
## Distractions.inf2km      0.284134864 -0.073016655  0.2975377804  4.960380e-01
## Distractions.sup2km     -0.284134864  0.073016655 -0.2975377804 -4.960380e-01
## AxeRoutier.inf1km        0.290641094  0.061459852  0.4691600173  3.895813e-01
## AxeRoutier.sup1km       -0.290641094 -0.061459852 -0.4691600173 -3.895813e-01
## StandingQuartier.bourge  0.534371455  0.146267931  0.5427083165  2.828916e-01
## StandingQuartier.moy    -0.081441591  0.029126777 -0.2250878173  5.573684e-02
## StandingQuartier.popu   -0.445495427 -0.168644389 -0.3231266182 -3.256276e-01
## QuartierAffaires.Non     0.192673487  0.070652789 -0.5061387469 -2.535940e-01
## QuartierAffaires.Oui    -0.192673487 -0.070652789  0.5061387469  2.535940e-01
## Quartier.Almadies        0.267527027  0.118487272  0.1507590673 -1.561407e-01
## Quartier.BelAir          0.087949304 -0.037963809 -0.0976302333  2.217052e-01
## Quartier.Bopp           -0.107717352 -0.054978010  0.0371779190 -2.027142e-01
## Quartier.Castors        -0.033012113  0.014718483 -0.0946641387 -2.295350e-01
## Quartier.Colobane       -0.124947223 -0.078655490 -0.0427452037 -2.103595e-02
## Quartier.Derkle         -0.143361780 -0.104341844 -0.0836473921 -2.669521e-01
## Quartier.Fann           -0.081191517 -0.018142966  0.2728999137  1.369320e-01
## Quartier.FannHock        0.079167161  0.241426741  0.2460214936 -5.546517e-02
## Quartier.FannResidence   0.594489969 -0.401145388  0.0594598140 -1.981150e-01
## Quartier.Fass           -0.096783516 -0.244324838 -0.3629096549  1.491921e-01
## Quartier.FenetreMermoz  -0.086290716 -0.025163659  0.2460369783  8.282680e-02
## Quartier.Foire           0.141188087  0.113702053 -0.1466182475  8.111606e-02
## Quartier.GrandYoff      -0.102546036 -0.047712169 -0.0340970914 -4.134315e-02
## Quartier.GueuleTapee    -0.091391745 -0.032350741  0.0878362064 -7.545233e-02
## Quartier.Hann            0.086501811  0.051897813 -0.0470829530  1.294282e-01
## Quartier.HLM            -0.032403718  0.048477765 -0.0684027811 -1.815278e-01
## Quartier.JetdEau        -0.113785193 -0.063247306  0.0643748502 -6.647781e-02
## Quartier.LiberteI       -0.147344456 -0.109599829 -0.0738864083 -1.625044e-02
## Quartier.LiberteVI      -0.023441992  0.124521350  0.0215211249 -1.342462e-01
## Quartier.Malika         -0.071834863  0.059897425 -0.2699835390 -1.199018e-02
## Quartier.Mamelles        0.378104386  0.174901410 -0.0714398841 -2.946188e-03
## Quartier.Medina         -0.176064461 -0.108547904  0.1126701146 -1.180343e-02
## Quartier.Mermoz         -0.051261025  0.055229138  0.0415570359  3.584298e-01
## Quartier.Ngor            0.119994523  0.209353302 -0.0113251581  9.288067e-02
## Quartier.NiayeCoker     -0.124003215 -0.077567116 -0.0392679972 -1.999202e-01
## Quartier.Parcelles       0.021041596  0.437417068  0.0451491306 -3.616002e-01
## Quartier.Pikine         -0.124003232 -0.077570455 -0.0756415702 -2.289628e-01
## Quartier.Plateau        -0.140677622 -0.027225363  0.5369006564  2.868468e-01
## Quartier.PointE          0.270719303  0.059349496 -0.0044585953  1.355403e-01
## Quartier.SacreCoeur      0.056218556  0.058350958 -0.2360511951  1.946422e-01
## Quartier.SacreCoeurIII   0.007211875  0.002172032 -0.1067888422  4.344549e-02
## Quartier.Yoff           -0.173102458 -0.104704562 -0.1478880341  4.256535e-02
##                                Comp 5
## SurfTerrain              1.777622e-06
## SurfHabitable            3.552860e-04
## SurfPiecResid           -3.051740e-02
## NbPieces                -9.701728e-02
## NbPiecesResid           -4.451819e-02
## NbSDB                   -2.046653e-01
## NbChamBur               -3.740840e-02
## NbSalonsSAM             -5.378093e-02
## NbWC                    -1.391414e-01
## NbCuis                  -6.374613e-02
## Type.Appart             -5.587752e-02
## Type.Villa               5.587752e-02
## Standing.Non             1.147788e-01
## Standing.Oui            -1.147788e-01
## Etat.Bon                 3.581194e-01
## Etat.Mediocre           -2.343795e-01
## Etat.Neuf               -4.983829e-02
## Etat.Vetuste            -2.166024e-01
## Jardin.Non               2.394500e-01
## Jardin.Oui              -2.394500e-01
## Cour.Non                -1.761303e-01
## Cour.Oui                 1.761303e-01
## Piscine.Non             -5.359399e-02
## Piscine.Oui              5.359399e-02
## Garage.Non               3.089216e-01
## Garage.Park             -4.778903e-01
## Garage.Priv1v           -8.774653e-03
## Garage.Priv2v            6.356304e-02
## Egout.Non                9.494935e-02
## Egout.Oui               -9.494935e-02
## HiTech.1inst             5.330116e-02
## HiTech.2inst            -4.708349e-02
## HiTech.Non              -2.194580e-02
## DistCtrVille.0           4.720810e-01
## DistCtrVille.1a5km      -2.346497e-01
## DistCtrVille.inf1km      1.697618e-01
## DistCtrVille.sup5km     -1.442871e-01
## Commerc.inf2km           3.365436e-01
## Commerc.sup2km          -3.365436e-01
## BordMer.inf2km          -2.907157e-01
## BordMer.sup2km           2.907157e-01
## Distractions.inf2km      2.560451e-01
## Distractions.sup2km     -2.560451e-01
## AxeRoutier.inf1km       -8.672357e-02
## AxeRoutier.sup1km        8.672357e-02
## StandingQuartier.bourge  1.106404e-01
## StandingQuartier.moy    -4.002392e-02
## StandingQuartier.popu   -7.120123e-02
## QuartierAffaires.Non    -5.729654e-01
## QuartierAffaires.Oui     5.729654e-01
## Quartier.Almadies       -1.157616e-01
## Quartier.BelAir         -1.133132e-01
## Quartier.Bopp            1.296944e-03
## Quartier.Castors         8.434794e-02
## Quartier.Colobane       -9.660266e-02
## Quartier.Derkle          4.560586e-02
## Quartier.Fann           -1.996885e-01
## Quartier.FannHock       -6.093475e-02
## Quartier.FannResidence   5.001309e-02
## Quartier.Fass            3.559837e-02
## Quartier.FenetreMermoz  -1.600580e-01
## Quartier.Foire          -1.485386e-01
## Quartier.GrandYoff      -3.642555e-01
## Quartier.GueuleTapee    -1.208158e-01
## Quartier.Hann           -1.160514e-01
## Quartier.HLM            -5.239876e-02
## Quartier.JetdEau        -1.086539e-01
## Quartier.LiberteI        7.222482e-02
## Quartier.LiberteVI      -5.278951e-02
## Quartier.Malika         -1.027358e-01
## Quartier.Mamelles        1.020130e-01
## Quartier.Medina          1.205860e-01
## Quartier.Mermoz         -1.691084e-01
## Quartier.Ngor            1.559469e-01
## Quartier.NiayeCoker      1.585103e-01
## Quartier.Parcelles      -1.093101e-01
## Quartier.Pikine          1.575745e-01
## Quartier.Plateau         4.720810e-01
## Quartier.PointE          2.906611e-02
## Quartier.SacreCoeur      2.337879e-01
## Quartier.SacreCoeurIII  -8.483835e-02
## Quartier.Yoff            5.377489e-02
##Interprétation du modèle prédictif fondé sur la première composante :
regLDKc1 = lm(log(logtsDK[,2]) ~ LDKLogYpls1$scores)
LogLoyerModelPLS1 = as.matrix(LDKLogYpls1$coefficients[,1,])%*%as.matrix(regLDKc1$coefficients[2])

##Interprétation du modèle prédictif fondé sur les 5 premières composantes :
regLDKc5 <- lm(log(logtsDK[,2]) ~ LDKLogYpls5$scores)
LogLoyerModelPLS5 <- as.matrix(LDKLogYpls5$coefficients[,1,])%*%as.matrix(regLDKc5$coefficients[2:6])
regLDKc5$coefficients[2:6]
## LDKLogYpls5$scoresComp 1 LDKLogYpls5$scoresComp 2 LDKLogYpls5$scoresComp 3 
##              0.004253071              0.005680053              0.408712086 
## LDKLogYpls5$scoresComp 4 LDKLogYpls5$scoresComp 5 
##              0.100948886              0.277579705
var(LDKLogYpls5$scores)
##               Comp 1        Comp 2        Comp 3       Comp 4        Comp 5
## Comp 1  4.657292e+04  3.913470e-12  1.358349e-13 4.725423e-13 -2.217983e-14
## Comp 2  3.913470e-12  2.850608e+03 -1.290101e-14 4.461717e-14 -2.943782e-14
## Comp 3  1.358349e-13 -1.290101e-14  1.696126e+00 1.838755e-15  1.048034e-16
## Comp 4  4.725423e-13  4.461717e-14  1.838755e-15 1.248503e+01  1.778051e-16
## Comp 5 -2.217983e-14 -2.943782e-14  1.048034e-16 1.778051e-16  5.443803e-01
##Régression Ridge:


#La fonction qui effectue la régession ridge nous permet de faire des régresions pénalisées, et nous permet de 
#contôler le coefficient de pénaalité: 
logLoy <- log(logtsDK[,2])
fit1 <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=0)
fit1
## 
## Call:  glmnet(x = as.matrix(LDK), y = logLoy, family = "gaussian", alpha = 0) 
## 
##     Df  %Dev Lambda
## 1   82  0.00 946.70
## 2   82  3.53 903.70
## 3   82  3.69 862.60
## 4   82  3.86 823.40
## 5   82  4.04 786.00
## 6   82  4.23 750.20
## 7   82  4.42 716.10
## 8   82  4.62 683.60
## 9   82  4.83 652.50
## 10  82  5.05 622.90
## 11  82  5.28 594.60
## 12  82  5.52 567.50
## 13  82  5.77 541.70
## 14  82  6.03 517.10
## 15  82  6.30 493.60
## 16  82  6.58 471.20
## 17  82  6.88 449.80
## 18  82  7.18 429.30
## 19  82  7.50 409.80
## 20  82  7.84 391.20
## 21  82  8.18 373.40
## 22  82  8.54 356.40
## 23  82  8.92 340.20
## 24  82  9.31 324.80
## 25  82  9.71 310.00
## 26  82 10.13 295.90
## 27  82 10.57 282.50
## 28  82 11.03 269.60
## 29  82 11.50 257.40
## 30  82 11.99 245.70
## 31  82 12.50 234.50
## 32  82 13.02 223.80
## 33  82 13.57 213.70
## 34  82 14.13 204.00
## 35  82 14.72 194.70
## 36  82 15.32 185.80
## 37  82 15.95 177.40
## 38  82 16.60 169.30
## 39  82 17.27 161.60
## 40  82 17.96 154.30
## 41  82 18.67 147.30
## 42  82 19.40 140.60
## 43  82 20.16 134.20
## 44  82 20.94 128.10
## 45  82 21.74 122.30
## 46  82 22.57 116.70
## 47  82 23.42 111.40
## 48  82 24.29 106.30
## 49  82 25.19 101.50
## 50  82 26.11  96.90
## 51  82 27.05  92.49
## 52  82 28.01  88.29
## 53  82 28.99  84.28
## 54  82 30.00  80.45
## 55  82 31.03  76.79
## 56  82 32.08  73.30
## 57  82 33.14  69.97
## 58  82 34.23  66.79
## 59  82 35.34  63.75
## 60  82 36.46  60.85
## 61  82 37.60  58.09
## 62  82 38.75  55.45
## 63  82 39.92  52.93
## 64  82 41.11  50.52
## 65  82 42.31  48.23
## 66  82 43.51  46.03
## 67  82 44.73  43.94
## 68  82 45.96  41.94
## 69  82 47.19  40.04
## 70  82 48.43  38.22
## 71  82 49.67  36.48
## 72  82 50.92  34.82
## 73  82 52.16  33.24
## 74  82 53.41  31.73
## 75  82 54.66  30.29
## 76  82 55.90  28.91
## 77  82 57.13  27.60
## 78  82 58.36  26.34
## 79  82 59.58  25.15
## 80  82 60.80  24.00
## 81  82 62.00  22.91
## 82  82 63.19  21.87
## 83  82 64.36  20.88
## 84  82 65.52  19.93
## 85  82 66.67  19.02
## 86  82 67.80  18.16
## 87  82 68.91  17.33
## 88  82 70.00  16.54
## 89  82 71.07  15.79
## 90  82 72.12  15.07
## 91  82 73.14  14.39
## 92  82 74.15  13.73
## 93  82 75.13  13.11
## 94  82 76.09  12.51
## 95  82 77.03  11.95
## 96  82 77.94  11.40
## 97  82 78.82  10.88
## 98  82 79.69  10.39
## 99  82 80.52   9.92
## 100 82 81.34   9.47
##Evolution des coeff quand lambda augmente:
plot(fit1, xvar='lambda')

cvfit1<- cv.glmnet(x=as.matrix(LDK), y=logLoy,family="gaussian",alpha=0)##Choix de lambda
plot(cvfit1) ##Courbe log(lambda) vs MSE

#valeur min de MSE (en validation croisée)
print(min(cvfit1$cvm)) 
## [1] 0.3579884
##lambda corresp.
print(cvfit1$lambda.min) 
## [1] 9.466981
##On relance la regression avec le meilleur lambda:
fit <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=0, lambda =9.466981)
#Coefficients du modèle obtenu:
coef(fit)
## 83 x 1 sparse Matrix of class "dgCMatrix"
##                                    s0
## (Intercept)              4.5504571462
## SurfTerrain              0.0001847839
## SurfHabitable            0.0003612079
## SurfPiecResid            0.0045285472
## NbPieces                 0.0087914863
## NbPiecesResid            0.0122634696
## NbSDB                    0.0477620380
## NbChamBur                0.0148572778
## NbSalonsSAM              0.0419043836
## NbWC                     0.0461536698
## NbCuis                   0.0837538080
## Type.Appart             -0.0628372650
## Type.Villa               0.0628458655
## Standing.Non            -0.0651306722
## Standing.Oui             0.0651397914
## Etat.Bon                 0.0377067427
## Etat.Mediocre           -0.0643994251
## Etat.Neuf                0.0383080260
## Etat.Vetuste            -0.0324734934
## Jardin.Non              -0.0494702152
## Jardin.Oui               0.0494770517
## Cour.Non                -0.0806814458
## Cour.Oui                 0.0806841932
## Piscine.Non             -0.0506623728
## Piscine.Oui              0.0506725636
## Garage.Non              -0.0693591791
## Garage.Park              0.0028997551
## Garage.Priv1v            0.0388881931
## Garage.Priv2v            0.0751981783
## Egout.Non                0.0485229866
## Egout.Oui               -0.0485280212
## HiTech.1inst             0.0221007368
## HiTech.2inst             0.0773392132
## HiTech.Non              -0.0404310853
## DistCtrVille.0           0.1134978890
## DistCtrVille.1a5km      -0.0020987183
## DistCtrVille.inf1km     -0.0334663693
## DistCtrVille.sup5km     -0.0144515885
## Commerc.inf2km           0.0046179802
## Commerc.sup2km          -0.0046187218
## BordMer.inf2km           0.0337037251
## BordMer.sup2km          -0.0337032019
## Distractions.inf2km      0.0732295375
## Distractions.sup2km     -0.0732299978
## AxeRoutier.inf1km        0.0629503755
## AxeRoutier.sup1km       -0.0629492205
## StandingQuartier.bourge  0.0957790192
## StandingQuartier.moy    -0.0183825207
## StandingQuartier.popu   -0.0753417853
## QuartierAffaires.Non    -0.0780218240
## QuartierAffaires.Oui     0.0780225067
## Quartier.Almadies        0.0305840265
## Quartier.BelAir          0.0186856223
## Quartier.Bopp           -0.0736677972
## Quartier.Castors        -0.0256732819
## Quartier.Colobane       -0.0744034727
## Quartier.Derkle         -0.0700682076
## Quartier.Fann            0.0698208589
## Quartier.FannHock        0.0791635446
## Quartier.FannResidence   0.1202105620
## Quartier.Fass           -0.0573161363
## Quartier.FenetreMermoz   0.0262481935
## Quartier.Foire          -0.0111182717
## Quartier.GrandYoff      -0.0842374423
## Quartier.GueuleTapee    -0.0226322829
## Quartier.Hann            0.0158698076
## Quartier.HLM            -0.0117097743
## Quartier.JetdEau        -0.0767583908
## Quartier.LiberteI       -0.0725814026
## Quartier.LiberteVI      -0.0203300633
## Quartier.Malika         -0.0790798776
## Quartier.Mamelles        0.0483702326
## Quartier.Medina         -0.0294313553
## Quartier.Mermoz          0.0303892493
## Quartier.Ngor            0.0546192055
## Quartier.NiayeCoker     -0.0401851870
## Quartier.Parcelles      -0.0238868286
## Quartier.Pikine         -0.0373187607
## Quartier.Plateau         0.1134952551
## Quartier.PointE          0.0561492437
## Quartier.SacreCoeur      0.0464885059
## Quartier.SacreCoeurIII  -0.0183166801
## Quartier.Yoff           -0.0438516877
#lambda le plus élevé dont le MSE est inf.
#à la borne haute de l’intervalle de min(MSE)
cvfit1$lambda.1se 
## [1] 12.51479
##ici R^2= 0.81 vs R^2= 0.97  pour PLS sur 5composantes 
fit1$beta[,100]
##             SurfTerrain           SurfHabitable           SurfPiecResid 
##            0.0001849058            0.0003614481            0.0045305744 
##                NbPieces           NbPiecesResid                   NbSDB 
##            0.0087954064            0.0122674857            0.0477684989 
##               NbChamBur             NbSalonsSAM                    NbWC 
##            0.0148582520            0.0419034645            0.0461485294 
##                  NbCuis             Type.Appart              Type.Villa 
##            0.0837439606           -0.0628236394            0.0628294111 
##            Standing.Non            Standing.Oui                Etat.Bon 
##           -0.0651229391            0.0651265687            0.0377033877 
##           Etat.Mediocre               Etat.Neuf            Etat.Vetuste 
##           -0.0643893009            0.0382952106           -0.0324671938 
##              Jardin.Non              Jardin.Oui                Cour.Non 
##           -0.0494561339            0.0494613569           -0.0806689700 
##                Cour.Oui             Piscine.Non             Piscine.Oui 
##            0.0806730621           -0.0506404096            0.0506487428 
##              Garage.Non             Garage.Park           Garage.Priv1v 
##           -0.0693381409            0.0029068701            0.0388790812 
##           Garage.Priv2v               Egout.Non               Egout.Oui 
##            0.0751685591            0.0484981155           -0.0485062883 
##            HiTech.1inst            HiTech.2inst              HiTech.Non 
##            0.0220888837            0.0773116813           -0.0404177032 
##          DistCtrVille.0      DistCtrVille.1a5km     DistCtrVille.inf1km 
##            0.1135000405           -0.0020991186           -0.0334554875 
##     DistCtrVille.sup5km          Commerc.inf2km          Commerc.sup2km 
##           -0.0144577215            0.0046261711           -0.0046236760 
##          BordMer.inf2km          BordMer.sup2km     Distractions.inf2km 
##            0.0336949479           -0.0336974142            0.0732230848 
##     Distractions.sup2km       AxeRoutier.inf1km       AxeRoutier.sup1km 
##           -0.0732246821            0.0629453272           -0.0629462704 
## StandingQuartier.bourge    StandingQuartier.moy   StandingQuartier.popu 
##            0.0957745461           -0.0183845305           -0.0753407843 
##    QuartierAffaires.Non    QuartierAffaires.Oui       Quartier.Almadies 
##           -0.0780265142            0.0780252802            0.0305948912 
##         Quartier.BelAir           Quartier.Bopp        Quartier.Castors 
##            0.0186852685           -0.0736694215           -0.0256784523 
##       Quartier.Colobane         Quartier.Derkle           Quartier.Fann 
##           -0.0744036064           -0.0700699173            0.0698289656 
##       Quartier.FannHock  Quartier.FannResidence           Quartier.Fass 
##            0.0791599623            0.1201991115           -0.0573199056 
##  Quartier.FenetreMermoz          Quartier.Foire      Quartier.GrandYoff 
##            0.0262519457           -0.0111197092           -0.0842375595 
##    Quartier.GueuleTapee           Quartier.Hann            Quartier.HLM 
##           -0.0226370166            0.0158725258           -0.0117135723 
##        Quartier.JetdEau       Quartier.LiberteI      Quartier.LiberteVI 
##           -0.0767487326           -0.0725753958           -0.0203350243 
##         Quartier.Malika       Quartier.Mamelles         Quartier.Medina 
##           -0.0790798950            0.0483744096           -0.0294323851 
##         Quartier.Mermoz           Quartier.Ngor     Quartier.NiayeCoker 
##            0.0303927141            0.0546292303           -0.0401868043 
##      Quartier.Parcelles         Quartier.Pikine        Quartier.Plateau 
##           -0.0238978848           -0.0373183092            0.1134963241 
##         Quartier.PointE     Quartier.SacreCoeur  Quartier.SacreCoeurIII 
##            0.0561455048            0.0464827684           -0.0183167663 
##           Quartier.Yoff 
##           -0.0438491545
##Comparaison des coefficients de PLS et Ridge:
fit1$beta[,100]
##             SurfTerrain           SurfHabitable           SurfPiecResid 
##            0.0001849058            0.0003614481            0.0045305744 
##                NbPieces           NbPiecesResid                   NbSDB 
##            0.0087954064            0.0122674857            0.0477684989 
##               NbChamBur             NbSalonsSAM                    NbWC 
##            0.0148582520            0.0419034645            0.0461485294 
##                  NbCuis             Type.Appart              Type.Villa 
##            0.0837439606           -0.0628236394            0.0628294111 
##            Standing.Non            Standing.Oui                Etat.Bon 
##           -0.0651229391            0.0651265687            0.0377033877 
##           Etat.Mediocre               Etat.Neuf            Etat.Vetuste 
##           -0.0643893009            0.0382952106           -0.0324671938 
##              Jardin.Non              Jardin.Oui                Cour.Non 
##           -0.0494561339            0.0494613569           -0.0806689700 
##                Cour.Oui             Piscine.Non             Piscine.Oui 
##            0.0806730621           -0.0506404096            0.0506487428 
##              Garage.Non             Garage.Park           Garage.Priv1v 
##           -0.0693381409            0.0029068701            0.0388790812 
##           Garage.Priv2v               Egout.Non               Egout.Oui 
##            0.0751685591            0.0484981155           -0.0485062883 
##            HiTech.1inst            HiTech.2inst              HiTech.Non 
##            0.0220888837            0.0773116813           -0.0404177032 
##          DistCtrVille.0      DistCtrVille.1a5km     DistCtrVille.inf1km 
##            0.1135000405           -0.0020991186           -0.0334554875 
##     DistCtrVille.sup5km          Commerc.inf2km          Commerc.sup2km 
##           -0.0144577215            0.0046261711           -0.0046236760 
##          BordMer.inf2km          BordMer.sup2km     Distractions.inf2km 
##            0.0336949479           -0.0336974142            0.0732230848 
##     Distractions.sup2km       AxeRoutier.inf1km       AxeRoutier.sup1km 
##           -0.0732246821            0.0629453272           -0.0629462704 
## StandingQuartier.bourge    StandingQuartier.moy   StandingQuartier.popu 
##            0.0957745461           -0.0183845305           -0.0753407843 
##    QuartierAffaires.Non    QuartierAffaires.Oui       Quartier.Almadies 
##           -0.0780265142            0.0780252802            0.0305948912 
##         Quartier.BelAir           Quartier.Bopp        Quartier.Castors 
##            0.0186852685           -0.0736694215           -0.0256784523 
##       Quartier.Colobane         Quartier.Derkle           Quartier.Fann 
##           -0.0744036064           -0.0700699173            0.0698289656 
##       Quartier.FannHock  Quartier.FannResidence           Quartier.Fass 
##            0.0791599623            0.1201991115           -0.0573199056 
##  Quartier.FenetreMermoz          Quartier.Foire      Quartier.GrandYoff 
##            0.0262519457           -0.0111197092           -0.0842375595 
##    Quartier.GueuleTapee           Quartier.Hann            Quartier.HLM 
##           -0.0226370166            0.0158725258           -0.0117135723 
##        Quartier.JetdEau       Quartier.LiberteI      Quartier.LiberteVI 
##           -0.0767487326           -0.0725753958           -0.0203350243 
##         Quartier.Malika       Quartier.Mamelles         Quartier.Medina 
##           -0.0790798950            0.0483744096           -0.0294323851 
##         Quartier.Mermoz           Quartier.Ngor     Quartier.NiayeCoker 
##            0.0303927141            0.0546292303           -0.0401868043 
##      Quartier.Parcelles         Quartier.Pikine        Quartier.Plateau 
##           -0.0238978848           -0.0373183092            0.1134963241 
##         Quartier.PointE     Quartier.SacreCoeur  Quartier.SacreCoeurIII 
##            0.0561455048            0.0464827684           -0.0183167663 
##           Quartier.Yoff 
##           -0.0438491545
##LASSO:


fit2 <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=1 )
fit2
## 
## Call:  glmnet(x = as.matrix(LDK), y = logLoy, family = "gaussian", alpha = 1) 
## 
##     Df  %Dev  Lambda
## 1    0  0.00 0.94670
## 2    3  6.34 0.90370
## 3    3 13.15 0.86260
## 4    3 19.36 0.82340
## 5    3 25.02 0.78600
## 6    4 30.18 0.75020
## 7    4 34.89 0.71610
## 8    4 39.17 0.68360
## 9    4 43.08 0.65250
## 10   4 46.65 0.62290
## 11   4 49.89 0.59460
## 12   4 52.85 0.56750
## 13   4 55.54 0.54170
## 14   4 58.00 0.51710
## 15   5 60.27 0.49360
## 16   5 62.37 0.47120
## 17   5 64.27 0.44980
## 18   5 66.01 0.42930
## 19   6 67.62 0.40980
## 20   7 69.25 0.39120
## 21   7 70.76 0.37340
## 22   7 72.13 0.35640
## 23   8 73.39 0.34020
## 24   8 74.88 0.32480
## 25  10 76.28 0.31000
## 26  10 77.59 0.29590
## 27  10 78.81 0.28250
## 28  12 80.22 0.26960
## 29  12 81.56 0.25740
## 30  13 82.81 0.24570
## 31  13 83.96 0.23450
## 32  13 85.01 0.22380
## 33  13 85.96 0.21370
## 34  15 86.85 0.20400
## 35  15 87.83 0.19470
## 36  16 88.70 0.18580
## 37  16 89.52 0.17740
## 38  17 90.26 0.16930
## 39  17 90.94 0.16160
## 40  17 91.56 0.15430
## 41  18 92.13 0.14730
## 42  17 92.67 0.14060
## 43  17 93.16 0.13420
## 44  17 93.60 0.12810
## 45  17 94.01 0.12230
## 46  18 94.37 0.11670
## 47  17 94.71 0.11140
## 48  17 95.02 0.10630
## 49  17 95.29 0.10150
## 50  18 95.55 0.09690
## 51  18 95.78 0.09249
## 52  17 95.99 0.08829
## 53  17 96.18 0.08428
## 54  17 96.36 0.08045
## 55  18 96.52 0.07679
## 56  17 96.66 0.07330
## 57  18 96.80 0.06997
## 58  19 96.92 0.06679
## 59  19 97.03 0.06375
## 60  18 97.13 0.06085
## 61  20 97.23 0.05809
## 62  21 97.33 0.05545
## 63  21 97.43 0.05293
## 64  21 97.52 0.05052
## 65  21 97.60 0.04823
## 66  21 97.67 0.04603
## 67  22 97.74 0.04394
## 68  23 97.82 0.04194
## 69  23 97.90 0.04004
## 70  23 97.97 0.03822
## 71  25 98.06 0.03648
## 72  25 98.16 0.03482
## 73  28 98.25 0.03324
## 74  29 98.34 0.03173
## 75  29 98.43 0.03029
## 76  29 98.51 0.02891
## 77  30 98.60 0.02760
## 78  30 98.67 0.02634
## 79  32 98.75 0.02515
## 80  32 98.83 0.02400
## 81  32 98.90 0.02291
## 82  33 98.96 0.02187
## 83  35 99.02 0.02088
## 84  35 99.09 0.01993
## 85  35 99.14 0.01902
## 86  36 99.20 0.01816
## 87  37 99.25 0.01733
## 88  37 99.31 0.01654
## 89  36 99.35 0.01579
## 90  37 99.39 0.01507
## 91  38 99.42 0.01439
## 92  38 99.46 0.01373
## 93  39 99.49 0.01311
## 94  40 99.52 0.01251
## 95  40 99.55 0.01195
## 96  41 99.59 0.01140
## 97  41 99.61 0.01088
## 98  41 99.64 0.01039
## 99  41 99.67 0.00992
## 100 41 99.69 0.00947
plot(fit2, xvar='lambda')

cvfit2 <- cv.glmnet(x=as.matrix(LDK), y=logLoy, family="gaussian", alpha=1)
plot(cvfit2) ##Courbe log(lambda) vs MSE

#valeur min de MSE (en validation croisée)
min(cvfit2$cvm)
## [1] 0.1120604
##lambda corresp.
min(cvfit2$lambda)
## [1] 0.009466981
##On relance la regression avec le meilleur lambda:
fit_new <- glmnet(x=as.matrix(LDK) , y=logLoy, family="gaussian",alpha=1, lambda =0.00947)
#Coefficients du modèle obtenu:
coef(fit_new)
## 83 x 1 sparse Matrix of class "dgCMatrix"
##                                    s0
## (Intercept)              3.817206e+00
## SurfTerrain              3.689737e-04
## SurfHabitable            .           
## SurfPiecResid            1.271086e-02
## NbPieces                 2.444574e-02
## NbPiecesResid            .           
## NbSDB                    9.525848e-02
## NbChamBur                .           
## NbSalonsSAM              9.199129e-02
## NbWC                     4.842380e-02
## NbCuis                   2.827871e-02
## Type.Appart             -2.507841e-01
## Type.Villa               .           
## Standing.Non             .           
## Standing.Oui             .           
## Etat.Bon                 1.140692e-03
## Etat.Mediocre           -1.947687e-01
## Etat.Neuf                .           
## Etat.Vetuste             .           
## Jardin.Non              -7.175928e-02
## Jardin.Oui               1.122933e-16
## Cour.Non                -7.461867e-01
## Cour.Oui                 7.690091e-14
## Piscine.Non              .           
## Piscine.Oui              .           
## Garage.Non              -3.349738e-01
## Garage.Park              4.319967e-02
## Garage.Priv1v            .           
## Garage.Priv2v            .           
## Egout.Non                .           
## Egout.Oui                .           
## HiTech.1inst             .           
## HiTech.2inst             .           
## HiTech.Non               .           
## DistCtrVille.0           6.827965e-01
## DistCtrVille.1a5km       .           
## DistCtrVille.inf1km      .           
## DistCtrVille.sup5km      .           
## Commerc.inf2km           1.486377e-01
## Commerc.sup2km          -1.086251e-16
## BordMer.inf2km           3.063656e-02
## BordMer.sup2km           .           
## Distractions.inf2km      5.182085e-01
## Distractions.sup2km      .           
## AxeRoutier.inf1km        1.940270e-01
## AxeRoutier.sup1km        .           
## StandingQuartier.bourge  4.561529e-01
## StandingQuartier.moy     .           
## StandingQuartier.popu   -1.801859e-01
## QuartierAffaires.Non    -3.708936e-03
## QuartierAffaires.Oui     .           
## Quartier.Almadies        .           
## Quartier.BelAir          .           
## Quartier.Bopp            .           
## Quartier.Castors         4.629276e-02
## Quartier.Colobane       -1.337399e-01
## Quartier.Derkle          5.411992e-01
## Quartier.Fann            3.776828e-02
## Quartier.FannHock        .           
## Quartier.FannResidence   2.513412e-01
## Quartier.Fass           -7.003918e-03
## Quartier.FenetreMermoz   .           
## Quartier.Foire           .           
## Quartier.GrandYoff      -2.926742e-01
## Quartier.GueuleTapee     .           
## Quartier.Hann            .           
## Quartier.HLM             9.730112e-02
## Quartier.JetdEau        -1.967823e-01
## Quartier.LiberteI        .           
## Quartier.LiberteVI      -1.258936e-01
## Quartier.Malika          .           
## Quartier.Mamelles        .           
## Quartier.Medina          .           
## Quartier.Mermoz          .           
## Quartier.Ngor            .           
## Quartier.NiayeCoker      .           
## Quartier.Parcelles       .           
## Quartier.Pikine          2.613676e-01
## Quartier.Plateau         1.072528e-01
## Quartier.PointE         -8.564901e-02
## Quartier.SacreCoeur      .           
## Quartier.SacreCoeurIII  -5.259822e-02
## Quartier.Yoff            .
##On trouve un R^2 de 0.99 ( vs  R^2= 0.81 pour Ridge et vs R^2= 0.97  pour PLS sur 5composantes)
fit2$beta[,100]
##             SurfTerrain           SurfHabitable           SurfPiecResid 
##            6.004233e-04            0.000000e+00            1.127415e-02 
##                NbPieces           NbPiecesResid                   NbSDB 
##            2.253262e-02            0.000000e+00            5.689767e-02 
##               NbChamBur             NbSalonsSAM                    NbWC 
##            0.000000e+00            1.164135e-01            6.307075e-02 
##                  NbCuis             Type.Appart              Type.Villa 
##            1.731411e-02           -2.327086e-01            2.109452e-13 
##            Standing.Non            Standing.Oui                Etat.Bon 
##            0.000000e+00            0.000000e+00            2.098806e-02 
##           Etat.Mediocre               Etat.Neuf            Etat.Vetuste 
##           -1.601989e-01            0.000000e+00            0.000000e+00 
##              Jardin.Non              Jardin.Oui                Cour.Non 
##           -4.312547e-02            1.531322e-13           -7.045781e-01 
##                Cour.Oui             Piscine.Non             Piscine.Oui 
##            3.519603e-13            0.000000e+00            0.000000e+00 
##              Garage.Non             Garage.Park           Garage.Priv1v 
##           -3.619065e-01            4.366410e-02            0.000000e+00 
##           Garage.Priv2v               Egout.Non               Egout.Oui 
##            0.000000e+00            0.000000e+00            0.000000e+00 
##            HiTech.1inst            HiTech.2inst              HiTech.Non 
##            0.000000e+00            0.000000e+00            0.000000e+00 
##          DistCtrVille.0      DistCtrVille.1a5km     DistCtrVille.inf1km 
##            7.709611e-01            0.000000e+00            0.000000e+00 
##     DistCtrVille.sup5km          Commerc.inf2km          Commerc.sup2km 
##            0.000000e+00            1.718972e-01            0.000000e+00 
##          BordMer.inf2km          BordMer.sup2km     Distractions.inf2km 
##            6.971288e-03            0.000000e+00            4.863005e-01 
##     Distractions.sup2km       AxeRoutier.inf1km       AxeRoutier.sup1km 
##           -1.296599e-03            2.123460e-01            0.000000e+00 
## StandingQuartier.bourge    StandingQuartier.moy   StandingQuartier.popu 
##            4.484426e-01            0.000000e+00           -1.799222e-01 
##    QuartierAffaires.Non    QuartierAffaires.Oui       Quartier.Almadies 
##            0.000000e+00            0.000000e+00            0.000000e+00 
##         Quartier.BelAir           Quartier.Bopp        Quartier.Castors 
##            0.000000e+00           -5.675849e-02            4.125753e-02 
##       Quartier.Colobane         Quartier.Derkle           Quartier.Fann 
##           -1.623327e-01            4.633412e-01            8.387169e-02 
##       Quartier.FannHock  Quartier.FannResidence           Quartier.Fass 
##            0.000000e+00            1.743841e-01           -3.304288e-02 
##  Quartier.FenetreMermoz          Quartier.Foire      Quartier.GrandYoff 
##            0.000000e+00            0.000000e+00           -3.214242e-01 
##    Quartier.GueuleTapee           Quartier.Hann            Quartier.HLM 
##            0.000000e+00            0.000000e+00            4.899427e-02 
##        Quartier.JetdEau       Quartier.LiberteI      Quartier.LiberteVI 
##           -2.288655e-01            0.000000e+00           -1.151605e-01 
##         Quartier.Malika       Quartier.Mamelles         Quartier.Medina 
##            0.000000e+00            0.000000e+00            0.000000e+00 
##         Quartier.Mermoz           Quartier.Ngor     Quartier.NiayeCoker 
##            4.865839e-03            0.000000e+00            0.000000e+00 
##      Quartier.Parcelles         Quartier.Pikine        Quartier.Plateau 
##            0.000000e+00            2.399992e-01            1.122180e-02 
##         Quartier.PointE     Quartier.SacreCoeur  Quartier.SacreCoeurIII 
##           -1.069969e-01            0.000000e+00           -6.740337e-02 
##           Quartier.Yoff 
##            0.000000e+00